{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "5850c318-81fe-4262-92b8-8692e10b809b",
   "metadata": {},
   "source": [
    "# 导包并定义函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1291a3f0-0447-4b31-af22-e0bf7c4c7ce4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T11:04:43.576784Z",
     "iopub.status.busy": "2024-11-11T11:04:43.576320Z",
     "iopub.status.idle": "2024-11-11T11:04:46.690474Z",
     "msg_id": "52061a11-2fb4-4d38-983f-16ff43359e28",
     "shell.execute_reply": "2024-11-11T11:04:46.689689Z",
     "shell.execute_reply.started": "2024-11-11T11:04:43.576752Z"
    }
   },
   "outputs": [],
   "source": [
    "run B榜复现_导包并定义函数.ipynb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "697f38d2-f889-4764-90ea-cbf9514b9f89",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T11:04:46.692494Z",
     "iopub.status.busy": "2024-11-11T11:04:46.691887Z",
     "iopub.status.idle": "2024-11-11T11:04:46.700968Z",
     "msg_id": "c75d8b50-9de1-439c-a9b2-34a5468b16f5",
     "shell.execute_reply": "2024-11-11T11:04:46.700203Z",
     "shell.execute_reply.started": "2024-11-11T11:04:46.692464Z"
    }
   },
   "outputs": [],
   "source": [
    "def BASIC_label_handle():\n",
    "    file_name = 'XW_ENTINFO_BASIC'\n",
    "    BASIC = get_data(file_name, num_rows=None)\n",
    "    temp_train = BASIC[BASIC['is_train'] == 1]\n",
    "    temp_b = BASIC[BASIC['is_train'] == 0]\n",
    "\n",
    "    data = BASIC[BASIC['is_train'] == 1]\n",
    "    a = temp_train['企业（机构）类型编码'].value_counts().index.to_list()\n",
    "    b = temp_b['企业（机构）类型编码'].value_counts().index.to_list()\n",
    "    res_qiyeleixing = []\n",
    "    for i in a:\n",
    "        if i not in b:\n",
    "            res_qiyeleixing.append(i)\n",
    "    data['是否在B榜企业类型里'] = data['企业（机构）类型编码'].apply(lambda x: 1 if x not in res_qiyeleixing else 0).astype(int)\n",
    "    \n",
    "    a = temp_train['国民经济行业代码'].value_counts().index.to_list()\n",
    "    b = temp_b['国民经济行业代码'].value_counts().index.to_list()\n",
    "    res_guominjingji = []\n",
    "    for i in a:\n",
    "        if i not in b:\n",
    "            res_guominjingji.append(i)\n",
    "    data['是否在B榜经济行业里'] = data['国民经济行业代码'].apply(lambda x: 1 if x not in res_guominjingji else 0).astype(int)\n",
    "\n",
    "    temp_b['是否在B榜企业类型里'], temp_b['是否在B榜经济行业里'] = 1, 1\n",
    "    res = pd.concat([data, temp_b])\n",
    "    res = res[['客户编号', '是否在B榜企业类型里', '是否在B榜经济行业里']]\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b743bd0d-f1fa-49c9-80eb-de91edd179d8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T11:04:46.702212Z",
     "iopub.status.busy": "2024-11-11T11:04:46.701819Z",
     "iopub.status.idle": "2024-11-11T11:04:47.734283Z",
     "msg_id": "326ff554-d519-4f35-b4aa-550ceddf5859",
     "shell.execute_reply": "2024-11-11T11:04:47.733472Z",
     "shell.execute_reply.started": "2024-11-11T11:04:46.702186Z"
    }
   },
   "outputs": [],
   "source": [
    "basic = BASIC_label_handle()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "cbc90dd2-296e-4311-93c7-66ca86d8c85c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T11:04:53.823477Z",
     "iopub.status.busy": "2024-11-11T11:04:53.822985Z",
     "iopub.status.idle": "2024-11-11T11:04:53.855365Z",
     "msg_id": "88946aa1-5b7d-42d1-88e2-b43468748b51",
     "shell.execute_reply": "2024-11-11T11:04:53.854649Z",
     "shell.execute_reply.started": "2024-11-11T11:04:53.823445Z"
    }
   },
   "outputs": [],
   "source": [
    "basic.to_pickle(\"../data/基本信息表补充两个特征_B榜.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ee02e893-e200-45f1-8ec9-25de33db73e7",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T11:05:43.861757Z",
     "iopub.status.busy": "2024-11-11T11:05:43.861258Z",
     "iopub.status.idle": "2024-11-11T11:05:43.903313Z",
     "msg_id": "3c4770ed-9624-48c1-b83d-18e4052b6861",
     "shell.execute_reply": "2024-11-11T11:05:43.902640Z",
     "shell.execute_reply.started": "2024-11-11T11:05:43.861726Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# df = pd.read_pickle(\"../data/基本信息表补充两个特征_B榜.pkl\")\n",
    "# o_df = pd.read_pickle(\"/home/mole/work/heyuyang/1104_B榜开始/训练集和B榜企业类型经济行业对比打标签.pkl\")\n",
    "# df.equals(o_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0adeafec-3bdf-4bc6-8122-25e59a064df8",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
